# Required packages:
require(tidyverse)
require(sf)
require(ggthemes)
require(flextable)

# Load data:
# The cities data contains calculations of drive times between industry sites and cities in BW
load("01_Data/spatial_dimension_city_data.Rdata")
load("01_Data/spatial_dimension_data.Rdata")
map <- geojsonsf::geojson_sf("01_Data/landkreise_simplify200.geojson")

#### 1 - Calculate optimal locations ####
# Create merge basis on site level
model_base_summarized <- model_base %>% 
  group_by(id) %>%
  summarize(m_risk = mean(risk))

model_base_summarized$geometry <- NULL

# Merge sites, with cities
cities_model_base <- merge(model_base_summarized, cities1, by="id")

# Round site risk
cities_model_base$m_risk <- round(cities_model_base$m_risk)

# Create subsets by risk level
r1 <- subset(cities_model_base, m_risk == 1)
r2 <- subset(cities_model_base, m_risk == 2)
r3 <- subset(cities_model_base, m_risk == 3)

# Weighted dataset by risk level 
# (risk 1 = 3 times included, risk 2 = 2 times included, risk 3 = 1 time included)

cities_risk_weighted <- rbind(r1, r1, r1,
                              r2, r2,
                              r3)

# Convert drive time into hours for consistency
cities_risk_weighted$time <- as.numeric(cities_risk_weighted$time) 
cities_risk_weighted$time <- cities_risk_weighted$time/60
cities_risk_weighted$time <- cities_risk_weighted$time/60

# Aggregate drive time per city
# Extract minimal drive time
minimal_drive_time <- 
  cities_risk_weighted %>% 
  group_by(district, city) %>% 
  summarise(total_drive = sum(as.numeric(time))) %>%
  arrange(district, total_drive) %>%
  slice_min(total_drive)

total_drive_time <- cities_risk_weighted %>% 
  group_by(district, city) %>% 
  summarise(total_drive = sum(as.numeric(time))) %>%
  arrange(district, total_drive) 

optimal_location <- unique(merge(minimal_drive_time,cities1[c("city","lon","lat")], by="city", all.x=T))

cities_location <- 
  cities_risk_weighted %>% 
  select(City, from) %>% 
  unique() %>% 
  separate(from,into=c("lat","lon"),"   ")

# Alternative risk weighting
risk_opti <- rbind(r1,r1,r1,r1,r1,r1,
                   r2,r2,
                   r3)

risk_opti %>% group_by(district, city) %>%
  summarise(total_drive = sum(as.numeric(time))) %>%
  arrange(district, total_drive) %>%
  slice_min(total_drive)

#### 2 - Figure 3: Optimal locations ####
plot_df <- cbind(location_level, location_coordinates)
plot_df2 <- merge(plot_df, optimal_location, by="district")

# Create data set of connections 
optimal_location_connections <- cities1[cities1$city %in% optimal_location$city,]
optimal_location_connections$time <- as.numeric(optimal_location_connections$time) / 60
optimal_location_connections$time <- as.numeric(optimal_location_connections$time) / 60

optimal_location_connections <-
  optimal_location_connections %>% 
  group_by(X, Y, lon, lat, district) %>%
  summarize(time = mean(time))

# Create plot
p4 <- ggplot(map)+
  geom_sf(fill="#EFECE6", size=0.1, color="grey20")+
  coord_sf()+
  xlab("")+
  ylab("")+
  theme_void()+
  geom_point(aes(X,Y),data = plot_df2, shape=21,size=0.8)+
  labs(color="Distance (Hours)")+
  theme(legend.position = "left")+
  geom_segment(data = optimal_location_connections, aes(x = X, y = Y, xend = lon, yend = lat, color=time))+
  scale_color_viridis_c(direction=-1, limits=c(min(location_level$time), max(location_level$time)))+
  geom_point(aes(rp_lon, rp_lat),data=plot_df2, fill="#FFBF69",color="black", size=3, alpha= 0.9,shape=23)+
  geom_point(aes(lon, lat),data=plot_df2, fill="green",color="black", size=3, alpha= 0.9,shape=23)+
  theme(text = element_text(size = 15)) 

p4

pdf("02_Figures/figure03.pdf",width=8, height=8)
p4
dev.off()

#### 3 - Table A8 / Figure A9: Bootstrap procedure for assessment of uncertainty ####

# Create bootstrap function

boot_function <- function(data) {
  set.seed(1234)
  out <- list()
  sample_base <- unique(data$id) 
  sample_size <- as.numeric(length(sample_base)-1)
  
  for (i in 1:1000) {
    set.seed(i)
    drawn_sample <- data.frame(id=sample(sample_base,size=sample_size, replace=T))
    drawn_sample <- merge(drawn_sample, data, by ="id")
    
    out[[i]] <- drawn_sample %>% group_by(city) %>% 
      summarise(total_drive = sum(as.numeric(time))) %>%
      mutate(draw  = i) 
  }
  
  out <- bind_rows(out)
  return(out)  
}

# Split data into districts and apply bootstrap function to each split
cities_risk_weighted_split <- split(cities_risk_weighted, cities_risk_weighted$district)
bootstrap_results <- lapply(cities_risk_weighted_split, boot_function)

# Convert results into data frame
bootstrap_results_df <- bind_rows(bootstrap_results, .id="column_label")
bootstrap_results_df$district <- bootstrap_results_df$column_label

# Label districts
bootstrap_results_df$district[bootstrap_results_df$district == 1] <- "Stuttgart"
bootstrap_results_df$district[bootstrap_results_df$district == 2] <- "Karlsruhe"
bootstrap_results_df$district[bootstrap_results_df$district == 3] <- "Freiburg"
bootstrap_results_df$district[bootstrap_results_df$district == 4] <- "Tübingen"


# Create plot of estimated cumulative drive time
pdf("02_Figures/appendix_A9.pdf", height =12, width=12)
bootstrap_results_df %>%
  group_by(district,city) %>% 
  mutate(mean_time =mean(total_drive)) %>% 
  group_by(district) %>% 
  arrange(mean_time) %>% 
  slice(1:10000) %>% 
  mutate(city = fct_reorder(city, mean_time)) %>% 
  ggplot(aes(
    y = city,
    x = total_drive
  )) + 
  ggdist::stat_halfeye(alpha = 0.5, .width=c(0.50,0.95)) +
  facet_wrap( ~ district, scales = "free") +
  ylab("") +
  xlab("Cummulative Travel Time") +
  ggthemes::theme_base()+
  theme(legend.position = "none",
        axis.text.y = element_text(size=10))+
  scale_color_grey()+
  scale_fill_grey()+
  theme(plot.background = element_rect(color = "white"))

dev.off()


bootstrap_mean_drive_city <-
  bootstrap_results_df %>%
  group_by(district,city) %>% 
  summarize(mean_time =mean(total_drive))


appendix_A8 <- merge(bootstrap_mean_drive_city, total_drive_time, by= "city")

appendix_A8 %>% 
  arrange(district.x, (total_drive)) %>%
  mutate(mean_time = round(mean_time, 2),
         total_drive = round(total_drive, 2)) %>%
  select(district.x, city, total_drive, mean_time) %>%
  as.matrix %>%
  htmlTable::htmlTable(file = "03_Tables/appendix_A8.html")

rm(list = ls())